library(ggplot2)
library(tidyverse)
library(dplyr)
library(forcats)
library(ggplot2)
Data is read and prepared in separate file. The variable for the type of voter is created by filtering individuals that lived in the same community for at least ten consecutive votes. For each individual the 10 most recent votes were selected. This allows for the classification of these individuals into “never voters” (0-1 participation), “selective voters” (2-8 participation) and “always voters” (9-10 participation).
TODO: initially I wanted to use source() to get prepared data files, but it takes extremely long (much longer than running the scripts themselves).. Why? Is there a good alternative, since now I have to do the levels and factorization again, since the csv doesn’t read them as programmed before…
setwd("/Users/alinelaurametzler/Documents/Universität/Master/Master Thesis/MA-Code/")
data <- read.csv("/Users/alinelaurametzler/Documents/Universität/Master/Master Thesis/MA-Code/Data/PreparedData/data.csv", encoding = "UTF-8") %>%
mutate(beteiligt = as.factor(beteiligt)) %>%
mutate(beteiligt = fct_relevel(beteiligt, c("keine Stimmbeteiligung", "mit Stimmbeteiligung"))) %>%
mutate(alter_c = fct_relevel(alter_c, c("18-30-Jährige","31-45-Jährige","46-60-Jährige","61-75-Jährige","Über 75-Jährige")),
sex = fct_relevel(sex, c("Mann", "Frau")),
konfession_c = fct_relevel(konfession_c, c("Andere/keine Konfession", "Christliche Konfession")),
residenz10 = fct_relevel(residenz10, c("0-10 Jahre", "Mehr als 10 Jahre")),
zugezogen = fct_relevel(zugezogen, c("In CH geboren","CH zugezogen")),
mEinkommen_c = fct_relevel(mEinkommen_c, c("0-25'000.-","25'000-55'000.-","55'000-90'000.-","Über 90'000.-")),
Vermoegen_c = fct_relevel(Vermoegen_c, c("0-8'000.-","8'000-60'000.-","60'000-185'000.-","Über 185'000.-")))
data_mlogit <- read.csv("/Users/alinelaurametzler/Documents/Universität/Master/Master Thesis/MA-Code/Data/PreparedData/data_mlogit.csv", encoding = "UTF-8") %>%
mutate(vote_type = as.factor(vote_type),
vote_type_det = as.factor(vote_type_det)) %>%
mutate(vote_type = fct_relevel(vote_type, c("never voter", "selective voter", "always voter")),
vote_type_det = fct_relevel(vote_type_det, c("never voter","seldom voter","occasional voter","frequent voter","always voter"))) %>%
mutate(alter_c = fct_relevel(alter_c, c("18-30-Jährige","31-45-Jährige","46-60-Jährige","61-75-Jährige","Über 75-Jährige")),
sex = fct_relevel(sex, c("Mann", "Frau")),
konfession_c = fct_relevel(konfession_c, c("Andere/keine Konfession", "Christliche Konfession")),
residenz10 = fct_relevel(residenz10, c("0-10 Jahre", "Mehr als 10 Jahre")),
zugezogen = fct_relevel(zugezogen, c("In CH geboren","CH zugezogen")),
mEinkommen_c = fct_relevel(mEinkommen_c, c("0-25'000.-","25'000-55'000.-","55'000-90'000.-","Über 90'000.-")),
Vermoegen_c = fct_relevel(Vermoegen_c, c("0-8'000.-","8'000-60'000.-","60'000-185'000.-","Über 185'000.-")))
data_mlogit15 <- read.csv("/Users/alinelaurametzler/Documents/Universität/Master/Master Thesis/MA-Code/Data/PreparedData/data_mlogit15.csv", encoding = "UTF-8")%>%
mutate(vote_type = as.factor(vote_type),
vote_type_det = as.factor(vote_type_det)) %>%
mutate(vote_type = fct_relevel(vote_type, c("never voter", "selective voter", "always voter")),
vote_type_det = fct_relevel(vote_type_det, c("never voter","seldom voter","occasional voter","frequent voter","always voter"))) %>%
mutate(alter_c = fct_relevel(alter_c, c("18-30-Jährige","31-45-Jährige","46-60-Jährige","61-75-Jährige","Über 75-Jährige")),
sex = fct_relevel(sex, c("Mann", "Frau")),
konfession_c = fct_relevel(konfession_c, c("Andere/keine Konfession", "Christliche Konfession")),
residenz10 = fct_relevel(residenz10, c("0-10 Jahre", "Mehr als 10 Jahre")),
zugezogen = fct_relevel(zugezogen, c("In CH geboren","CH zugezogen")),
mEinkommen_c = fct_relevel(mEinkommen_c, c("0-25'000.-","25'000-55'000.-","55'000-90'000.-","Über 90'000.-")),
Vermoegen_c = fct_relevel(Vermoegen_c, c("0-8'000.-","8'000-60'000.-","60'000-185'000.-","Über 185'000.-")))
# source("MA_adminDataPreparation.R", local = knitr::knit_global())
Hier folgt dann noch eine schöne Tabelle, evtl. mit Kummulierter Häufigkeit.
summary(data_mlogit$vote_type)
## never voter selective voter always voter
## 27830 57811 10847
summary(data_mlogit$vote_type_det)
## never voter seldom voter occasional voter frequent voter
## 27830 22405 15236 20170
## always voter
## 10847
data_table <- data_mlogit %>%
group_by(anz_teilnahme) %>%
summarise(count = n()) %>%
mutate(cum = cumsum(count)/sum(count)) %>%
rename(`Anzahl Teilnahme` = anz_teilnahme,
`Anzahl Individuen` = count,
`Kummulative Häufigkeit` = cum)
library(xtable)
table <- xtable(data_table)
# print(table)
library(reactable)
reactable(data_table,
defaultPageSize = 15,
columns = list(`Kummulative Häufigkeit` = colDef(format = colFormat(percent = TRUE, digits = 1))))
H1A/B
summary(data_mlogit$massgebendesEinkommen_imputed)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -4248547 33775 59712 69000 95689 193935 138
summary(data_mlogit$anz_teilnahme)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 3.000 4.028 8.000 10.000
plot_scatter <- data_mlogit %>%
mutate(massgebendesEinkommen_imputed = case_when(
massgebendesEinkommen_imputed < 0 ~ 0,
TRUE ~ massgebendesEinkommen_imputed
)) %>%
ggplot(aes(x = massgebendesEinkommen_imputed, y = anz_teilnahme)) +
# geom_bin2d(bins = 10)
geom_jitter(alpha = 0.1) +
geom_smooth() +
theme_minimal()
plot_scatter
plot_scatter_age <- data_mlogit %>%
filter(!is.na(alter_c)) %>%
mutate(massgebendesEinkommen_imputed = case_when(
massgebendesEinkommen_imputed < 0 ~ 0,
TRUE ~ massgebendesEinkommen_imputed
)) %>%
ggplot(aes(x = massgebendesEinkommen_imputed, y = anz_teilnahme)) +
geom_jitter(alpha = 0.1) +
geom_smooth() +
facet_wrap(~ alter_c) +
theme_minimal()
plot_scatter_age
plot_scatter_sex <- data_mlogit %>%
filter(!is.na(sex_c)) %>%
mutate(massgebendesEinkommen_imputed = case_when(
massgebendesEinkommen_imputed < 0 ~ 0,
TRUE ~ massgebendesEinkommen_imputed
)) %>%
ggplot(aes(x = massgebendesEinkommen_imputed, y = anz_teilnahme)) +
geom_jitter(alpha = 0.1) +
geom_smooth() +
facet_wrap(~ sex_c) +
theme_minimal()
plot_scatter_sex
Was ist der Unterschied zwischen mlogit() und multinom()? Weshalb funktioniert mlogit() nicht und multinom() schon?
H1.1A/B
# library(mlogit)
#
# # Generating the Person-Choice Matrix
# data_mlogitprep <- as.data.frame(data_mlogit) %>%
# select(vote_type, alter_c, sex_c, konfession_c, residenz10, zugezogen, mEinkommen_c, Vermoegen_c) %>%
# na.omit()
# data_mlogit.pc <- mlogit.data(data_mlogitprep,
# varying = 2:8,
# choice = "vote_type",
# shape = "wide",
# sep = "_")
# head(data_mlogit.pc)
#
# mlogit.fit <- mlogit(vote_type ~ 0 | alter_c + sex_c + konfession_c + residenz10 + zugezogen + mEinkommen_c + Vermoegen_c,
# shape = "long",
# data = data_mlogit.pc)
library(nnet)
library(stargazer)
mlogit <- multinom(vote_type ~ alter_c + sex_c + konfession_c + residenz10 + zugezogen + mEinkommen_c + Vermoegen_c,
data = data_mlogit)
## # weights: 48 (30 variable)
## initial value 94135.692567
## iter 10 value 72238.419672
## iter 20 value 72086.064590
## iter 30 value 71209.595713
## final value 71055.884479
## converged
stargazer(mlogit, type = "text")
##
## ===============================================================
## Dependent variable:
## ----------------------------
## selective voter always voter
## (1) (2)
## ---------------------------------------------------------------
## alter_c31-45-Jährige -0.371*** -0.260***
## (0.026) (0.060)
##
## alter_c46-60-Jährige -0.349*** 0.183***
## (0.027) (0.058)
##
## alter_c61-75-Jährige -0.102*** 1.191***
## (0.029) (0.058)
##
## alter_cÜber 75-Jährige -0.498*** 0.834***
## (0.031) (0.060)
##
## sex_cMann 0.014 0.222***
## (0.017) (0.026)
##
## konfession_cChristliche Konfession 0.038** 0.111***
## (0.018) (0.030)
##
## residenz10Mehr als 10 Jahre 0.110*** 0.201***
## (0.019) (0.035)
##
## zugezogenCH zugezogen -0.640*** -1.098***
## (0.022) (0.042)
##
## mEinkommen_c25'000-55'000.- 0.101*** 0.297***
## (0.023) (0.050)
##
## mEinkommen_c55'000-90'000.- 0.400*** 0.943***
## (0.026) (0.050)
##
## mEinkommen_cÜber 90'000.- 0.838*** 1.696***
## (0.030) (0.053)
##
## Vermoegen_c8'000-60'000.- 0.444*** 0.687***
## (0.023) (0.059)
##
## Vermoegen_c60'000-185'000.- 0.791*** 1.221***
## (0.026) (0.058)
##
## Vermoegen_cÜber 185'000.- 1.212*** 1.893***
## (0.027) (0.056)
##
## Constant 0.022 -3.599***
## (0.030) (0.074)
##
## ---------------------------------------------------------------
## Akaike Inf. Crit. 142,171.800 142,171.800
## ===============================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
mlogit_det <- multinom(vote_type_det ~ alter_c + sex_c + konfession_c + residenz10 + zugezogen + mEinkommen_c + Vermoegen_c,
data = data_mlogit15)
## # weights: 80 (60 variable)
## initial value 122610.199045
## iter 10 value 112769.521412
## iter 20 value 112342.975927
## iter 30 value 111652.496082
## iter 40 value 110999.693572
## iter 50 value 110743.532181
## iter 60 value 110702.484393
## final value 110694.073162
## converged
stargazer(mlogit_det, type = "text")
##
## ============================================================================================
## Dependent variable:
## ---------------------------------------------------------
## seldom voter occasional voter frequent voter always voter
## (1) (2) (3) (4)
## --------------------------------------------------------------------------------------------
## alter_c31-45-Jährige -0.236*** -0.353*** -0.395*** -0.366***
## (0.035) (0.040) (0.042) (0.058)
##
## alter_c46-60-Jährige -0.334*** -0.267*** -0.092** 0.197***
## (0.036) (0.041) (0.042) (0.056)
##
## alter_c61-75-Jährige -0.366*** -0.062 0.496*** 1.339***
## (0.040) (0.044) (0.043) (0.055)
##
## alter_cÜber 75-Jährige -0.599*** -0.361*** 0.234*** 1.149***
## (0.043) (0.048) (0.046) (0.057)
##
## sex_cMann -0.005 0.005 0.094*** 0.279***
## (0.022) (0.024) (0.023) (0.024)
##
## konfession_cChristliche Konfession -0.003 0.009 0.028 0.058**
## (0.024) (0.027) (0.025) (0.028)
##
## residenz10Mehr als 10 Jahre 0.086*** -0.033 0.091*** 0.135***
## (0.026) (0.029) (0.029) (0.034)
##
## zugezogenCH zugezogen -0.403*** -0.726*** -0.928*** -1.148***
## (0.030) (0.035) (0.034) (0.038)
##
## mEinkommen_c25'000-55'000.- 0.139*** 0.052 0.087** 0.282***
## (0.032) (0.037) (0.037) (0.045)
##
## mEinkommen_c55'000-90'000.- 0.277*** 0.289*** 0.484*** 0.922***
## (0.035) (0.039) (0.038) (0.046)
##
## mEinkommen_cÜber 90'000.- 0.542*** 0.620*** 1.028*** 1.682***
## (0.040) (0.044) (0.042) (0.049)
##
## Vermoegen_c8'000-60'000.- 0.376*** 0.496*** 0.611*** 0.705***
## (0.032) (0.039) (0.040) (0.054)
##
## Vermoegen_c60'000-185'000.- 0.586*** 0.928*** 1.090*** 1.201***
## (0.035) (0.041) (0.042) (0.053)
##
## Vermoegen_cÜber 185'000.- 0.818*** 1.312*** 1.621*** 1.841***
## (0.036) (0.041) (0.041) (0.051)
##
## Constant -0.874*** -1.378*** -1.892*** -3.309***
## (0.043) (0.050) (0.052) (0.071)
##
## --------------------------------------------------------------------------------------------
## Akaike Inf. Crit. 221,508.100 221,508.100 221,508.100 221,508.100
## ============================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
H2
plot_scatter_ageincome <- data_mlogit %>%
filter(!is.na(mEinkommen_c)) %>%
mutate(massgebendesEinkommen_imputed = case_when(
massgebendesEinkommen_imputed < 0 ~ 0,
TRUE ~ massgebendesEinkommen_imputed
)) %>%
ggplot(aes(x = alter_v, y = anz_teilnahme)) +
geom_jitter(alpha = 0.1) +
geom_smooth() +
facet_wrap(~ mEinkommen_c) +
theme_minimal()
plot_scatter_ageincome
plot_scatter_agevermoegen <- data_mlogit %>%
filter(!is.na(mEinkommen_c)) %>%
mutate(massgebendesEinkommen_imputed = case_when(
massgebendesEinkommen_imputed < 0 ~ 0,
TRUE ~ massgebendesEinkommen_imputed
)) %>%
ggplot(aes(x = alter_v, y = anz_teilnahme)) +
# geom_point() +
# geom_jitter() +
geom_smooth() +
facet_wrap(~ Vermoegen_c) +
theme_minimal()
plot_scatter_agevermoegen
H3A/B
Identify the jumps
->Since we only have income per year –>participation as share (sum(partic)/sum(all_votes)) –>then look for jumps in yearly income –>compare share of participation before and after
Possible distinctions: 1) From no income to some income 2) From some income to no income 3) From some income to less than 50% of that income 4) From some income to less than 75% of that income
TODO: Do you have a recommendation for defining jumps of income/wealth? And should I address it seperately in the paper?
library(data.table)
# TODO: write function to just specify income/wealth and the wanted distinction of jumps
# get aggregated data by year for respondents
data_year <- data %>%
mutate(anz_teilnahme = case_when(
beteiligt == "mit Stimmbeteiligung" ~ 1,
TRUE ~ 0
)) %>%
mutate(anz_abstimmungen = case_when(
beteiligt == "mit Stimmbeteiligung" ~ 1,
beteiligt == "keine Stimmbeteiligung" ~ 1
)) %>%
group_by(id_ek, abstimmungsjahr) %>%
summarise(anz_teilnahme = sum(anz_teilnahme),
anz_abstimmungen = sum(anz_abstimmungen),
share_teilnahme = anz_teilnahme/anz_abstimmungen,
sex = last(sex_imp),
alter = mean(alter_v, na.rm=TRUE),
konfession = last(konfession_imp),
residenz = mean(residenz_imp, na.rm=TRUE),
zugezogen = last(zugezogen),
einkommen = mean(massgebendesEinkommen_imputed, na.rm=TRUE),
vermoegen = mean(reinvermoegen_imputed, na.rm=TRUE)
) %>%
distinct() %>%
ungroup()
# Identify individuals with bigger changes in income or wealth
data_year <- data_year %>%
group_by(id_ek) %>%
# calculate change compared to previous year
mutate(change_eink = 100 * (einkommen - lag(einkommen, default = NA))/lag(einkommen, default = NA)) %>%
mutate(change_verm = 100 * (vermoegen - lag(vermoegen, default = NA))/lag(vermoegen, default = NA)) %>%
# Identify big changes in income/wealth
# TODO: when is change considered to be big? (after 50% change?)
mutate(bigchange_eink = case_when(
change_eink >= 20 ~ 5,
change_eink >= 50 ~ 1,
change_eink <= -50 ~ 2,
TRUE ~ 0
)) %>%
mutate(bigchange_verm = case_when(
change_verm >= 20 ~ 5,
change_verm >= 50 ~ 1,
change_verm <= -50 ~ 2,
TRUE ~ 0
)) %>%
# create variable to detect individuals with treatment (i.e. big income drop)
# Only valid when also wealth dropped (otherwise just income shift)
mutate(treat_less50income = case_when(
bigchange_eink == 2 & bigchange_verm %in% c(0, 5) ~ 1,
TRUE ~ 0
)) %>%
mutate(treat_less50wealth = case_when(
bigchange_verm == 2 & bigchange_eink %in% c(0, 5) ~ 1,
TRUE ~ 0
))
data_year <- data_year %>%
# for multiple income/wealth shocks only take first one
group_by(id_ek) %>%
mutate(inc_change = as.numeric(rle(treat_less50income)$lengths %>% {rep(seq(length(.)), .)})) %>%
mutate(wealth_change = as.numeric(rle(treat_less50wealth)$lengths %>% {rep(seq(length(.)), .)})) %>%
ungroup() %>%
mutate(inc_change = case_when(
inc_change > 2 ~ 3,
TRUE ~ inc_change
)) %>%
mutate(wealth_change = case_when(
wealth_change > 2 ~ 3,
TRUE ~ wealth_change
)) %>%
mutate(treatment_year_inc = case_when(
inc_change == 2 ~ abstimmungsjahr,
TRUE ~ NA_integer_
)) %>%
mutate(treatment_year_wealth = case_when(
wealth_change == 2 ~ abstimmungsjahr,
TRUE ~ NA_integer_
)) %>%
group_by(id_ek) %>%
fill(treatment_year_inc, .direction = "updown") %>%
fill(treatment_year_wealth, .direction = "updown") %>%
mutate(treat_less50income = case_when(
!is.na(treatment_year_inc) ~ 1,
TRUE ~ 0
)) %>%
mutate(treat_less50wealth = case_when(
!is.na(treatment_year_wealth) ~ 1,
TRUE ~ 0
)) %>%
# create var years from/to treatment. For the never-treated (i.e. control) units,
# we'll set the "time_to_treatment" value at 0 for the middle year.
mutate(treatment_year_inc =
ifelse(is.na(treatment_year_inc), round(mean(abstimmungsjahr)), treatment_year_inc)) %>%
mutate(treatment_year_wealth =
ifelse(is.na(treatment_year_wealth), round(mean(abstimmungsjahr)), treatment_year_wealth)) %>%
mutate(time_to_treat_inc = ifelse(treat_less50income == 1, (abstimmungsjahr - treatment_year_inc), 0)) %>%
mutate(time_to_treat_wealth = ifelse(treat_less50wealth == 1, (abstimmungsjahr - treatment_year_wealth), 0))
Help: https://lost-stats.github.io/Model_Estimation/Research_Design/event_study.html
library(fixest)
# TODO: do I need to account for all control variables in the diff-in-diff model?
# -->how should I handle the categorical ones?
# income shocks
mod_twfe = feols(share_teilnahme ~ i(time_to_treat_inc, treat_less50income, ref = -1) + ## key interaction: time × treatment status
alter + einkommen | ## Other controls
id_ek + abstimmungsjahr, ## FEs
cluster = ~id_ek, ## Clustered SEs
data = data_year)
iplot(mod_twfe,
xlab = 'Time to treatment',
main = 'Event-Study of the Effects of Neg. Income Shocks on Voter Turnout (TWFE)')
# wealth shocks
mod_twfe2 = feols(share_teilnahme ~ i(time_to_treat_wealth, treat_less50income, ref = -1) + ## key interaction: time × treatment status
alter + vermoegen | ## Other controls
id_ek + abstimmungsjahr, ## FEs
cluster = ~id_ek, ## Clustered SEs
data = data_year)
iplot(mod_twfe2,
xlab = 'Time to treatment',
main = 'Event-Study of the Effects of Neg. Wealth Shocks on Voter Turnout (TWFE)')
The effect of income is very different depending on the age category!
plot_scatter_sex
TODO: Is there a better method to compare the effect of the independent on the dependent variable than just subsetting the group and looking at the estimates?
votes <- read.csv("/Users/alinelaurametzler/Documents/Universität/Master/Master Thesis/MA-Code/Data/PreparedData/votes_data_short.csv", encoding = "UTF-8")
# select votes with low/high importance (i.e. lowest/highest quartile of importance)
votes_lowimp <- votes %>%
filter(quantile(importance, 0.25) >= importance) %>%
select(datum, titel_kurz_d, jour, mois, annee, importance)
votes_highimp <- votes %>%
filter(quantile(importance, 0.75) <= importance) %>%
select(datum, titel_kurz_d, jour, mois, annee, importance)
# join with admin. data
turnout_lowimp <- votes_lowimp %>%
left_join(data, by = "datum")
turnout_highimp <- votes_highimp %>%
left_join(data, by = "datum")
# multinomial logit model
mlogit_lowimp <- multinom(beteiligt ~ alter_c + sex_c + konfession_c + residenz10 + zugezogen + mEinkommen_c + Vermoegen_c,
data = turnout_lowimp)
## # weights: 16 (15 variable)
## initial value 290321.230843
## iter 10 value 260635.182048
## iter 20 value 253380.654245
## iter 20 value 253380.652818
## iter 20 value 253380.652817
## final value 253380.652817
## converged
mlogit_highimp <- multinom(beteiligt ~ alter_c + sex_c + konfession_c + residenz10 + zugezogen + mEinkommen_c + Vermoegen_c,
data = turnout_highimp)
## # weights: 16 (15 variable)
## initial value 327324.199929
## iter 10 value 299060.223222
## iter 20 value 295444.989403
## iter 20 value 295444.989375
## iter 20 value 295444.989375
## final value 295444.989375
## converged
stargazer(mlogit_lowimp, mlogit_highimp, type = "text")
##
## ===============================================================
## Dependent variable:
## ----------------------------
## beteiligt
## (1) (2)
## ---------------------------------------------------------------
## alter_c31-45-Jährige -0.173*** -0.205***
## (0.012) (0.010)
##
## alter_c46-60-Jährige 0.240*** 0.104***
## (0.012) (0.010)
##
## alter_c61-75-Jährige 0.877*** 0.662***
## (0.012) (0.011)
##
## alter_cÜber 75-Jährige 0.646*** 0.304***
## (0.014) (0.013)
##
## sex_cMann 0.131*** 0.089***
## (0.007) (0.006)
##
## konfession_cChristliche Konfession 0.039*** 0.009
## (0.008) (0.007)
##
## residenz10Mehr als 10 Jahre 0.264*** 0.154***
## (0.009) (0.007)
##
## zugezogenCH zugezogen -0.604*** -0.677***
## (0.010) (0.009)
##
## mEinkommen_c25'000-55'000.- 0.049*** 0.081***
## (0.011) (0.010)
##
## mEinkommen_c55'000-90'000.- 0.412*** 0.426***
## (0.012) (0.010)
##
## mEinkommen_cÜber 90'000.- 0.851*** 0.829***
## (0.013) (0.011)
##
## Vermoegen_c8'000-60'000.- 0.386*** 0.434***
## (0.011) (0.009)
##
## Vermoegen_c60'000-185'000.- 0.945*** 0.931***
## (0.011) (0.010)
##
## Vermoegen_cÜber 185'000.- 0.665*** 0.873***
## (0.012) (0.011)
##
## Constant -1.800*** -1.055***
## (0.014) (0.012)
##
## ---------------------------------------------------------------
## Akaike Inf. Crit. 506,791.300 590,920.000
## ===============================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
# 10.2 Sozialversicherungen
# 6.1 Steuerwesen
votes_umverteilung <- votes %>%
filter(d1e2 %in% c(6.1, 10.2))
votes_steuern <- votes %>%
filter(d1e2 == 6.1)
# join with admin. data
turnout_umverteilung<- votes_umverteilung %>%
left_join(data, by = "datum")
turnout_steuern <- votes_steuern %>%
left_join(data, by = "datum")
# multinomial logit model
mlogit_umverteilung <- multinom(beteiligt ~ alter_c + sex_c + konfession_c + residenz10 + zugezogen + mEinkommen_c + Vermoegen_c,
data = turnout_umverteilung)
## # weights: 16 (15 variable)
## initial value 188387.699616
## iter 10 value 168350.965178
## iter 20 value 166105.694898
## iter 20 value 166105.694870
## iter 20 value 166105.694870
## final value 166105.694870
## converged
mlogit_steuern <- multinom(beteiligt ~ alter_c + sex_c + konfession_c + residenz10 + zugezogen + mEinkommen_c + Vermoegen_c,
data = turnout_steuern)
## # weights: 16 (15 variable)
## initial value 27474.967943
## iter 10 value 24100.346412
## final value 23799.429944
## converged
stargazer(mlogit_umverteilung, mlogit_steuern, type = "text")
##
## ===============================================================
## Dependent variable:
## ----------------------------
## beteiligt
## (1) (2)
## ---------------------------------------------------------------
## alter_c31-45-Jährige -0.159*** -0.137***
## (0.014) (0.037)
##
## alter_c46-60-Jährige 0.290*** 0.310***
## (0.014) (0.037)
##
## alter_c61-75-Jährige 0.896*** 1.015***
## (0.015) (0.040)
##
## alter_cÜber 75-Jährige 0.607*** 0.797***
## (0.017) (0.045)
##
## sex_cMann 0.085*** 0.114***
## (0.008) (0.023)
##
## konfession_cChristliche Konfession -0.003 -0.041
## (0.010) (0.025)
##
## residenz10Mehr als 10 Jahre 0.144*** 0.042
## (0.010) (0.028)
##
## zugezogenCH zugezogen -0.694*** -0.694***
## (0.012) (0.031)
##
## mEinkommen_c25'000-55'000.- 0.054*** -0.025
## (0.013) (0.035)
##
## mEinkommen_c55'000-90'000.- 0.391*** 0.309***
## (0.014) (0.037)
##
## mEinkommen_cÜber 90'000.- 0.791*** 0.781***
## (0.015) (0.040)
##
## Vermoegen_c8'000-60'000.- 0.420*** 0.419***
## (0.013) (0.033)
##
## Vermoegen_c60'000-185'000.- 0.943*** 1.041***
## (0.013) (0.033)
##
## Vermoegen_cÜber 185'000.- 1.024*** 0.000
## (0.017) (0.000)
##
## Constant -1.362*** -1.362***
## (0.017) (0.043)
##
## ---------------------------------------------------------------
## Akaike Inf. Crit. 332,241.400 47,626.860
## ===============================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
Compare households with high income/wealth to households with low income wealth
Compare highest earning person of household to other members that have less income/wealth –>Especially look at adult children that still live with parents
# TODO: can I just define children by their age group (18-25) living with older age group (35-65)?
# -->what about couples, shared flats with bigger age gaps? (right now I just ignore this)
data_households <- data %>%
ungroup() %>%
filter(haushalttyp_sw_2 == "Mehrpersonenhaushalte") %>%
filter(generationen_sw %in% c("Zwei Generationen", "Drei Generationen", "Vier Generationen")) %>%
mutate(young_pers = case_when(
alter_v <= 25 ~ 1,
TRUE ~ 0
)) %>%
mutate(older_pers = case_when(
alter_v > 35 & alter_v <= 65 ~ 1,
TRUE ~ 0
)) %>%
group_by(householdID_sw) %>%
# all housholds (mind. 2 generations) with young people and older people in them
filter(sum(young_pers) >= 1 & sum(older_pers) >= 1) %>%
# create variable to sum income of parents (=older_pers in household)
group_by(householdID_sw, older_pers) %>%
mutate(income_older = case_when(
# married couples have joined incomes
steuer_tarif_imputed == "verheiratet" & older_pers == 1 ~ massgebendesEinkommen_imputed,
steuer_tarif_imputed == "alleinstehend/unverheiratet" & older_pers == 1 ~ sum(massgebendesEinkommen_imputed) ,
TRUE ~ NA_real_
)) %>%
ungroup() %>%
group_by(householdID_sw) %>%
fill(income_older, .direction = "updown") %>%
filter(young_pers == 1) %>%
ungroup() %>%
filter(!is.na(income_older)) %>%
# take one random vote date per individual
group_by(id_ek) %>%
sample_n(1)
# households with low income of parents
data_households_low <- data_households %>%
ungroup() %>%
filter(quantile(income_older, 0.25) >= income_older)
mlogit_household_low <- multinom(beteiligt ~ alter_c + sex_c + konfession_c + residenz10 + zugezogen + income_older+ mEinkommen_c + Vermoegen_c,
data = data_households_low)
## # weights: 17 (16 variable)
## initial value 2111.326312
## iter 10 value 1611.518528
## final value 1587.445651
## converged
# households with high income of parents
data_households_high <- data_households %>%
filter(quantile(income_older, 0.75) <= income_older)
mlogit_household_high <- multinom(beteiligt ~ alter_c + sex_c + konfession_c + residenz10 + zugezogen + income_older + mEinkommen_c + Vermoegen_c,
data = data_households_high)
## # weights: 17 (16 variable)
## initial value 8556.901944
## iter 10 value 7737.657252
## iter 20 value 7387.679024
## final value 7387.669354
## converged
stargazer(mlogit_household_low, mlogit_household_high, type = "text")
##
## ===============================================================
## Dependent variable:
## ----------------------------
## beteiligt
## (1) (2)
## ---------------------------------------------------------------
## alter_c31-45-Jährige 0.000 0.000
## (0.000) (0.000)
##
## alter_c46-60-Jährige 0.000 0.000
## (0.000) (0.000)
##
## alter_c61-75-Jährige 0.000 0.000
## (0.000) (0.000)
##
## alter_cÜber 75-Jährige 0.000 0.000
## (0.000) (0.000)
##
## sex_cMann -0.044*** -0.048***
## (0.000) (0.000)
##
## konfession_cChristliche Konfession 0.240*** 0.336***
## (0.000) (0.000)
##
## residenz10Mehr als 10 Jahre 0.480*** 0.404***
## (0.000) (0.000)
##
## zugezogenCH zugezogen -0.166*** 0.029***
## (0.000) (0.000)
##
## income_older 0.00000*** 0.00000**
## (0.00000) (0.00000)
##
## mEinkommen_c25'000-55'000.- -0.359*** -0.712***
## (0.000) (0.000)
##
## mEinkommen_c55'000-90'000.- -0.600*** -0.637***
## (0.000) (0.000)
##
## mEinkommen_cÜber 90'000.- -1.533*** -1.220***
## (0.000) (0.000)
##
## Vermoegen_c8'000-60'000.- 0.575*** 0.639***
## (0.000) (0.000)
##
## Vermoegen_c60'000-185'000.- 0.942*** 1.078***
## (0.000) (0.000)
##
## Vermoegen_cÜber 185'000.- 1.427*** 1.264***
## (0.000) (0.000)
##
## Constant -1.957*** -1.507***
## (0.000) (0.000)
##
## ---------------------------------------------------------------
## Akaike Inf. Crit. 3,198.891 14,799.340
## ===============================================================
## Note: *p<0.1; **p<0.05; ***p<0.01